suppressPackageStartupMessages(library(tidyverse))
## Warning: package 'ggplot2' was built under R version 4.2.3
## Warning: package 'tidyr' was built under R version 4.2.3
## Warning: package 'readr' was built under R version 4.2.3
## Warning: package 'dplyr' was built under R version 4.2.3
## Warning: package 'stringr' was built under R version 4.2.3
devtools::load_all('~/Google Drive/My Drive/Scripts/R_packages/myUtilities/')
## ℹ Loading myUtilities
wd <- "/Users/s-mitsutomi/My Drive (shuheimitsutomi@ric.u-tokyo.ac.jp)/Analysis/METTL2A/"
setwd(wd)
figdir <- paste0(wd, 'Figures/Espresso/Expression/')
tabledir <- paste0(wd, 'Tables/Espresso/')
Read espresso deseq2 result
add_genetype2 <- function(df) {
df |>
mutate(
genetype2 = case_when(
gene_type == 'protein_coding' & seqname == 'chrM' ~ 'mt-mRNA',
gene_type == 'protein_coding' & seqname != 'chrM' ~ 'mRNA',
grepl('MT-RNR', gene_name) & seqname == 'chrM' ~ 'mt-rRNA',
grepl('MT-T', gene_name) & seqname == 'chrM' ~ 'mt-tRNA',
is.na(gene_type) ~ 'unannotated gene',
.default = 'other ncRNAs'
)
)
}
add_isDET <- function(df) {
df |>
rowwise() |>
mutate(
isUp = case_when(
max(siMETTL2A_G_pvalue, siMETTL2A_I_pvalue) < .05 &
min(siMETTL2A_G_log2FoldChange, siMETTL2A_I_log2FoldChange) > 0
~ 'common',
siMETTL2A_G_pvalue < .05 & siMETTL2A_G_log2FoldChange > 0
~ 'only G',
siMETTL2A_I_pvalue < .05 & siMETTL2A_I_log2FoldChange > 0
~ 'only I',
.default = 'not'),
isDown = case_when(
max(siMETTL2A_G_pvalue, siMETTL2A_I_pvalue) < .05 &
max(siMETTL2A_G_log2FoldChange, siMETTL2A_I_log2FoldChange) < 0
~ 'common',
siMETTL2A_G_pvalue < .05 & siMETTL2A_G_log2FoldChange < 0
~ 'only G',
siMETTL2A_I_pvalue < .05 & siMETTL2A_I_log2FoldChange < 0
~ 'only I',
.default = 'not')
) |>
mutate(
common_DETs = case_when(
isUp == 'common' ~ 'up',
isDown == 'common' ~ 'down',
.default = 'other'
)
) |>
ungroup()
}
calc_percentage <- function(df) {
df |>
reframe(n = n()) |>
mutate(percentage = 100 * n / sum(n)) |>
arrange(-percentage)
}
espresso_deseq2_genetype2_isDET <-
read_tsv(paste0(wd, 'Tables/Espresso/espresso_DESeq2_2024-03-30.tsv')) |>
add_genetype2() |>
add_isDET()
## Rows: 36717 Columns: 25
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (7): transcript_id, transcript_type, transcript_name, gene_id, gene_typ...
## dbl (18): siMETTL2A_baseMean, siMETTL2A_log2FoldChange, siMETTL2A_lfcSE, siM...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
espresso_deseq2_genetype2_isDET |>
export_tsv(outdir = tabledir)
##
## Exported to: /Users/s-mitsutomi/My Drive (shuheimitsutomi@ric.u-tokyo.ac.jp)/Analysis/METTL2A/Tables/Espresso/espresso_deseq2_genetype2_isDET_2024-04-18.tsv
## # A tibble: 36,717 × 29
## transcript_id transcript_type transcript_name gene_id gene_type gene_name
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 ENST00000498442.1 retained_intron CRBN-212 ENSG00… protein_… CRBN
## 2 ENST00000459840.5 retained_intron CRBN-205 ENSG00… protein_… CRBN
## 3 ENST00000231948.9 protein_coding CRBN-201 ENSG00… protein_… CRBN
## 4 ENST00000432408.6 protein_coding CRBN-203 ENSG00… protein_… CRBN
## 5 ENST00000339437.… protein_coding TRNT1-203 ENSG00… protein_… TRNT1
## 6 ENST00000488263.5 retained_intron CRBN-209 ENSG00… protein_… CRBN
## 7 ENST00000420393.5 protein_coding TRNT1-207 ENSG00… protein_… TRNT1
## 8 ENST00000698415.1 retained_intron TRNT1-230 ENSG00… protein_… TRNT1
## 9 ENST00000450014.1 protein_coding CRBN-204 ENSG00… protein_… CRBN
## 10 ENST00000698416.1 retained_intron TRNT1-231 ENSG00… protein_… TRNT1
## # ℹ 36,707 more rows
## # ℹ 23 more variables: siMETTL2A_baseMean <dbl>,
## # siMETTL2A_log2FoldChange <dbl>, siMETTL2A_lfcSE <dbl>,
## # siMETTL2A_stat <dbl>, siMETTL2A_pvalue <dbl>, siMETTL2A_padj <dbl>,
## # siMETTL2A_I_baseMean <dbl>, siMETTL2A_I_log2FoldChange <dbl>,
## # siMETTL2A_I_lfcSE <dbl>, siMETTL2A_I_stat <dbl>, siMETTL2A_I_pvalue <dbl>,
## # siMETTL2A_I_padj <dbl>, siMETTL2A_G_baseMean <dbl>, …
Expression level
espresso_deseq2_genetype2_isDET |>
ggplot(aes(x = siMETTL2A_baseMean)) +
geom_histogram() +
scale_x_log10() +
geom_vline(xintercept = c(0.1, 1))
## Warning in scale_x_log10(): log-10 transformation introduced infinite values.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 340 rows containing non-finite outside the scale range
## (`stat_bin()`).

Correlation between two siRNAs
espresso_deseq2_genetype2_isDET |>
plot_2dhistogram(
x = siMETTL2A_G_log2FoldChange, y = siMETTL2A_I_log2FoldChange,
save_outdir = figdir, base_size = 7, width = 6, height = 6,
axis_lim = c(-10, 10)
)
## Warning in cor.test.default(x = mf[[1L]], y = mf[[2L]], ...): Cannot compute
## exact p-value with ties
## # A tibble: 2 × 9
## estimate statistic p.value method method_short alternative parameter conf.low
## <dbl> <dbl> <dbl> <chr> <chr> <chr> <int> <dbl>
## 1 0.440 2.72e12 0 Spearm… Spearman two.sided NA NA
## 2 0.474 9.43e 1 0 Pearso… Pearson two.sided 30784 0.465
## # ℹ 1 more variable: conf.high <dbl>
## # A tibble: 2 × 1
## msg
## <chr>
## 1 Spearman: r = 0.44, p < 2.2e-16
## 2 Pearson: r = 0.47, p < 2.2e-16
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_bin2d()`).
## Warning: Removed 1 row containing non-finite outside the scale range (`stat_bin2d()`).
## Removed 1 row containing non-finite outside the scale range (`stat_bin2d()`).
## Removed 1 row containing non-finite outside the scale range (`stat_bin2d()`).
## Removed 1 row containing non-finite outside the scale range (`stat_bin2d()`).

espresso_deseq2_2dhistogram <-
espresso_deseq2_genetype2_isDET |>
ggplot(aes(x = siMETTL2A_G_log2FoldChange, y = siMETTL2A_I_log2FoldChange)) +
geom_hex(bins = 50) +
scale_fill_viridis_c(trans = 'log10') +
lims(x = c(-10, 10), y = c(-10, 10)) +
tune::coord_obs_pred(ratio = 1)
espresso_deseq2_2dhistogram |>
ggsave_multiple_formats(
width = 5, height = 5, fontsize = 7, outdir = figdir
)
## Warning: Removed 5932 rows containing non-finite outside the scale range
## (`stat_binhex()`).
## Warning: Removed 5932 rows containing non-finite outside the scale range
## (`stat_binhex()`).
## Removed 5932 rows containing non-finite outside the scale range
## (`stat_binhex()`).
## Removed 5932 rows containing non-finite outside the scale range
## (`stat_binhex()`).
## Removed 5932 rows containing non-finite outside the scale range
## (`stat_binhex()`).

espresso_deseq2_genetype2_isDET |>
group_by(isUp, isDown) |>
calc_percentage()
## # A tibble: 9 × 4
## isUp isDown n percentage
## <chr> <chr> <int> <dbl>
## 1 not not 32016 87.2
## 2 not only I 1059 2.88
## 3 only I not 872 2.37
## 4 only G not 862 2.35
## 5 not only G 677 1.84
## 6 not common 540 1.47
## 7 common not 438 1.19
## 8 only G only I 159 0.433
## 9 only I only G 94 0.256
espresso_deseq2_genetype2_isDET |>
group_by(isUp) |>
calc_percentage()
## # A tibble: 4 × 3
## isUp n percentage
## <chr> <int> <dbl>
## 1 not 34292 93.4
## 2 only G 1021 2.78
## 3 only I 966 2.63
## 4 common 438 1.19
espresso_deseq2_genetype2_isDET |>
group_by(isDown) |>
calc_percentage()
## # A tibble: 4 × 3
## isDown n percentage
## <chr> <int> <dbl>
## 1 not 34188 93.1
## 2 only I 1218 3.32
## 3 only G 771 2.10
## 4 common 540 1.47
espresso_deseq2_genetype2_isDET |>
group_by(common_DETs) |>
calc_percentage()
## # A tibble: 3 × 3
## common_DETs n percentage
## <chr> <int> <dbl>
## 1 other 35739 97.3
## 2 down 540 1.47
## 3 up 438 1.19
# of genetypes in DETs
espresso_deseq2_commonup <-
espresso_deseq2_genetype2_isDET |>
filter(common_DETs == 'up')
espresso_deseq2_commonup |>
export_tsv(outdir = tabledir)
##
## Exported to: /Users/s-mitsutomi/My Drive (shuheimitsutomi@ric.u-tokyo.ac.jp)/Analysis/METTL2A/Tables/Espresso/espresso_deseq2_commonup_2024-04-18.tsv
## # A tibble: 438 × 29
## transcript_id transcript_type transcript_name gene_id gene_type gene_name
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 ENST00000307839.… protein_coding RPL15-201 ENSG00… protein_… RPL15
## 2 ENST00000306627.8 protein_coding UBE2E1-201 ENSG00… protein_… UBE2E1
## 3 ENST00000438607.2 protein_coding TMA7-201 ENSG00… protein_… TMA7
## 4 ENST00000477624.1 retained_intron TMA7-202 ENSG00… protein_… TMA7
## 5 ENST00000273258.4 protein_coding ARL6IP5-201 ENSG00… protein_… ARL6IP5
## 6 ENST00000355354.… protein_coding CD47-201 ENSG00… protein_… CD47
## 7 ENST00000264538.4 protein_coding IFT57-201 ENSG00… protein_… IFT57
## 8 ENST00000265062.8 protein_coding RAB7A-201 ENSG00… protein_… RAB7A
## 9 ENST00000451728.6 protein_coding CNBP-204 ENSG00… protein_… CNBP
## 10 ENST00000354910.… protein_coding ANAPC13-201 ENSG00… protein_… ANAPC13
## # ℹ 428 more rows
## # ℹ 23 more variables: siMETTL2A_baseMean <dbl>,
## # siMETTL2A_log2FoldChange <dbl>, siMETTL2A_lfcSE <dbl>,
## # siMETTL2A_stat <dbl>, siMETTL2A_pvalue <dbl>, siMETTL2A_padj <dbl>,
## # siMETTL2A_I_baseMean <dbl>, siMETTL2A_I_log2FoldChange <dbl>,
## # siMETTL2A_I_lfcSE <dbl>, siMETTL2A_I_stat <dbl>, siMETTL2A_I_pvalue <dbl>,
## # siMETTL2A_I_padj <dbl>, siMETTL2A_G_baseMean <dbl>, …
espresso_deseq2_commondown <-
espresso_deseq2_genetype2_isDET |>
filter(common_DETs == 'down')
espresso_deseq2_commondown |>
export_tsv(outdir = tabledir)
##
## Exported to: /Users/s-mitsutomi/My Drive (shuheimitsutomi@ric.u-tokyo.ac.jp)/Analysis/METTL2A/Tables/Espresso/espresso_deseq2_commondown_2024-04-18.tsv
## # A tibble: 540 × 29
## transcript_id transcript_type transcript_name gene_id gene_type gene_name
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 ENST00000301964.7 protein_coding TADA3-201 ENSG00… protein_… TADA3
## 2 ENST00000383817.5 protein_coding CIDEC-202 ENSG00… protein_… CIDEC
## 3 ENST00000344629.… protein_coding OGG1-205 ENSG00… protein_… OGG1
## 4 ENST00000306024.4 protein_coding LSM3-201 ENSG00… protein_… LSM3
## 5 ENST00000479563.5 retained_intron RPL14-208 ENSG00… protein_… RPL14
## 6 ENST00000338970.… protein_coding RPL14-201 ENSG00… protein_… RPL14
## 7 ENST00000383729.9 protein_coding P4HTM-202 ENSG00… protein_… P4HTM
## 8 ENST00000326739.9 protein_coding IMPDH2-201 ENSG00… protein_… IMPDH2
## 9 ENST00000308388.7 protein_coding GMPPB-202 ENSG00… protein_… GMPPB
## 10 ENST00000417626.8 protein_coding IFRD2-202 ENSG00… protein_… IFRD2
## # ℹ 530 more rows
## # ℹ 23 more variables: siMETTL2A_baseMean <dbl>,
## # siMETTL2A_log2FoldChange <dbl>, siMETTL2A_lfcSE <dbl>,
## # siMETTL2A_stat <dbl>, siMETTL2A_pvalue <dbl>, siMETTL2A_padj <dbl>,
## # siMETTL2A_I_baseMean <dbl>, siMETTL2A_I_log2FoldChange <dbl>,
## # siMETTL2A_I_lfcSE <dbl>, siMETTL2A_I_stat <dbl>, siMETTL2A_I_pvalue <dbl>,
## # siMETTL2A_I_padj <dbl>, siMETTL2A_G_baseMean <dbl>, …
add_yrange <- function(df) {
new_df <- df |>
mutate(ymax = cumsum(percentage / 100))
new_df$ymin <- c(0, head(new_df$ymax, n = -1))
return(new_df)
}
donutplot_genetype2 <- function(df, color_values) {
df |>
add_yrange() |>
ggplot(aes(
xmin = 2, xmax = 4, ymin = ymin, ymax = ymax,
fill = genetype2, colour = genetype2
)) +
geom_rect() +
coord_polar(theta = 'y') +
ggrepel::geom_text_repel(
aes(label = genetype2, y = (ymin + ymax) / 2), x = 1
) +
xlim(c(-1,4)) +
scale_fill_manual(values = color_values) +
scale_color_manual(values = color_values) +
theme_void()
}
espresso_deseq2_commonups_genetype_summary <-
espresso_deseq2_commonup |>
group_by(genetype2) |>
calc_percentage() |>
add_yrange()
espresso_deseq2_commonups_genetype_summary
## # A tibble: 5 × 5
## genetype2 n percentage ymax ymin
## <chr> <int> <dbl> <dbl> <dbl>
## 1 mRNA 401 91.6 0.916 0
## 2 other ncRNAs 16 3.65 0.952 0.916
## 3 mt-mRNA 11 2.51 0.977 0.952
## 4 unannotated gene 8 1.83 0.995 0.977
## 5 mt-rRNA 2 0.457 1 0.995
espresso_deseq2_commondowns_genetype_summary <-
espresso_deseq2_commondown |>
group_by(genetype2) |>
calc_percentage() |>
add_yrange()
espresso_deseq2_commondowns_genetype_summary
## # A tibble: 3 × 5
## genetype2 n percentage ymax ymin
## <chr> <int> <dbl> <dbl> <dbl>
## 1 mRNA 527 97.6 0.976 0
## 2 other ncRNAs 11 2.04 0.996 0.976
## 3 unannotated gene 2 0.370 1 0.996
espresso_deseq2_commonups_genetype_donutplot <-
espresso_deseq2_commonups_genetype_summary |>
donutplot_genetype2(
color_values = c('#3e98f2', '#f2983e', '#f23e98', 'grey30', '#3ef298')
)
espresso_deseq2_commonups_genetype_donutplot |>
ggsave_multiple_formats(
outdir = figdir, width = 5, height = 5, fontsize = 7
)

espresso_deseq2_commondowns_genetype_donutplot <-
espresso_deseq2_commondowns_genetype_summary |>
donutplot_genetype2(
color_values = c('#3e98f2', 'grey30', '#3ef298')
)
espresso_deseq2_commondowns_genetype_donutplot |>
ggsave_multiple_formats(
outdir = figdir, width = 5, height = 5, fontsize = 7
)

% of DETs among genetypes
espresso_deseq2_DETs_genetype_summary <-
espresso_deseq2_genetype2_isDET |>
group_by(genetype2, common_DETs) |>
reframe(n = n()) |>
group_by(genetype2) |>
mutate(percent = 100 * n / sum(n))
espresso_deseq2_DETs_genetype_summary
## # A tibble: 13 × 4
## # Groups: genetype2 [6]
## genetype2 common_DETs n percent
## <chr> <chr> <int> <dbl>
## 1 mRNA down 527 1.69
## 2 mRNA other 30234 97.0
## 3 mRNA up 401 1.29
## 4 mt-mRNA other 2 15.4
## 5 mt-mRNA up 11 84.6
## 6 mt-rRNA up 2 100
## 7 mt-tRNA other 7 100
## 8 other ncRNAs down 11 0.238
## 9 other ncRNAs other 4590 99.4
## 10 other ncRNAs up 16 0.347
## 11 unannotated gene down 2 0.218
## 12 unannotated gene other 906 98.9
## 13 unannotated gene up 8 0.873
espresso_deseq2_DETs_genetype_summary_barplot <-
espresso_deseq2_DETs_genetype_summary |>
ggplot(aes(x = genetype2 |> fct_rev(),
y = percent, fill = common_DETs)) +
geom_bar(stat = 'identity') +
coord_flip() +
scale_y_reverse() +
scale_fill_manual(values = c('#3e3ef2', 'grey', '#f23e3e'))
espresso_deseq2_DETs_genetype_summary_barplot |>
ggsave_multiple_formats(outdir = figdir, width = 5, height = 4, fontsize = 7)

espresso_deseq2_DETs_genetype_summary2 <-
espresso_deseq2_genetype2_isDET |>
group_by(genetype2, common_DETs) |>
reframe(n = n()) |>
group_by(common_DETs) |>
mutate(percent = 100 * n /sum(n))
espresso_deseq2_DETs_genetype_summary_barplot2 <-
espresso_deseq2_DETs_genetype_summary2 |>
ggplot(aes(x = common_DETs |> fct_rev(),
y = percent, fill = genetype2)) +
geom_bar(stat = 'identity') +
coord_flip() +
scale_y_reverse() +
scale_fill_manual(
values = c('#3e98f2', '#f2983e', '#f23e98', '#983ef2', 'grey30', '#3ef298')
)
espresso_deseq2_DETs_genetype_summary_barplot2 |>
ggsave_multiple_formats(outdir = figdir, width = 5, height = 4, fontsize = 7)
